import codecs
from fairseq.data import Dictionary
from fairseq.tokenizer import tokenize_line

input_path = '/home/rickwwang/project_own/story_generation/data/test.wp_target_500.srl2.format.event'
dict_path = '/home/rickwwang/project_research/fairseq/data-bin/writingPrompts-prompt2srl2_500/dict.event.txt'
result_path = input_path + '.replace'

dd = Dictionary.load(dict_path)

lines = codecs.open(input_path, 'r', encoding='utf8').readlines()
converted = []
for line in lines:
    ids = dd.encode_line(
        line=line.strip(),
        line_tokenizer=tokenize_line,
        add_if_not_exist=False,
        consumer=None,
        append_eos=False,
        reverse_order=False,
    )
    line = dd.string(ids, None, escape_unk=True)
    converted.append(line)

with codecs.open(result_path, 'w', encoding='utf8') as fout:
    fout.write('\n'.join(converted))
    fout.write('\n')